***********************************************************
* Internal migration and crime in Brazil *
* Author: Eva-Maria Egger 

* Contact: egger@wider.unu.edu
***********************************************************

* This do-file creates a panel of municipalities of the Annual National Household Survey (PNAD) from 2001 to 2009.
	
***********************************************************
** Set globals for directories

global tables
global graphs 
global data

*set directory

cd 

***********************************************************
* Data available at the website of the Brazilian National Institute of Geography and Statistics 
*	https://www.ibge.gov.br/estatisticas/sociais/trabalho/19897-sintese-de-indicadores-pnad2.html?=&t=microdados

	*INPUTS: 
		*PES2001.txt
		*PES2002.txt
		*PES2003.txt
		*PES2004.txt
		*PES2005.txt
		*PES2006.txt
		*PES2007.txt
		*PES2008.txt
		*PES2009.txt
		*"PNAD_municip-codes.xlsx"
		
	*OUTPUTS:
		*"PNAD_2001-09.dta"

***********************************************************

tempfile hh09 hh08 hh07 hh06 hh05 hh04 hh03 hh02 hh01 i09 i08 i07 i06 i05 i04 i03 i02 i01 a09 a08 a07 a06 a05 a04 a03 a02 a01
clear

forv y=1/9{

	if `y'==9{
	//2009
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 68 educ_high 758 hours 665 work_active 673 work_emp_main 674 ///
		work_stat_main 675-676 activ_group 677-678 occup 679-680 inc_mth 693-704 weight_i 747-751 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2008
	if `y'==8{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 67 educ_high 753 hours 660 work_active 668 work_emp_main 669 ///
		work_stat_main 670-671 activ_group 672-673 occup 674-675 inc_mth 688-699 weight_i 742-746 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2007
	if `y'==7{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 67 educ_high 749 hours 656 work_active 664 work_emp_main 665 ///
		work_stat_main 666-667 activ_group 668-669 occup 670-671 inc_mth 684-695 weight_i 738-742 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2006
	if `y'==6{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 67 educ_high 818 hours 711 work_active 719 work_emp_main 720 ///
		work_stat_main 721-722 activ_group 723-724 occup 725-726 inc_mth 739-750 weight_i 807-811 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2005
	if `y'==5{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 63 educ_high 809 hours 702 work_active 710 work_emp_main 711 ///
		work_stat_main 712-713 activ_group 714-715 occup 716-717 inc_mth 730-741 weight_i 798-802 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2004
	if `y'==4{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 62 educ_high 794 hours 687 work_active 695 work_emp_main 696 ///
		work_stat_main 697-698 activ_group 699-700 occup 701-702 inc_mth 715-726 weight_i 783-785 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2003
	if `y'==3{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 62 educ_high 898 hours 791 work_active 799 work_emp_main 800 ///
		work_stat_main 801-802 activ_group 803-804 occup 805-806 inc_mth 819-830 weight_i 887-891 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2002
	if `y'==2{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 62 educ_high 779 hours 672 work_active 680 work_emp_main 681 ///
		work_stat_main 682-683 activ_group 684-685 occup 686-687 inc_mth 700-711 weight_i 768-772 using "PNAD_200`y'\dados\PES200`y'.txt"
	}
	//2001
	if `y'==1{
		infix str hhid 5-15 V0102 5-12 year 1-4 UF 5-6 sex 18 age 27-29 school 65 educ_high 730 hours 626 work_active 633 work_emp_main 634 ///
		work_stat_main 635-636 activ_group 637-638 occup 639 inc_mth 652-663 weight_i 720-724 using "PNAD_200`y'\dados\PES200`y'.txt"
	}


	*recode
	recode sex (4=0) (2=1)
	ren sex male 
	* restricting data to relevant sub-sample
	drop if work_active==2 //only active labour force	
	keep if school==4 // only workers currently not in education
	drop if age<=15 | age>65 // only workers who are most likely to be finished with education
	*define skill groups: 
	g skills=occup
	drop if skills==10 | skills==0 // drop undefined and military or police
	recode skills (1=3) (3=2) (4=2) (5=2) (6=2) (7=2) (8=2) (9=1)
	la define skills 1 "unskilled" 2 "skilled" 3 "high-skilled"
	la value skills skills
	tab skills, g(skills)
	*gen dummies for variables that shall be aggregated to municipality level, =1 for category we are interested in
	g formal_ = cond((work_stat_main>=1 & work_stat_main<=3) | work_stat_main==6 | work_stat_main==10, 1, 0, .) //signed work card
	g highe_ = cond(educ_high==5 | educ_high==5, 1, 0, .) // 11 or more years of education
	g highs_ = cond(occup==1 | occup==2 | occup==3, 1, 0, .) //high skilled according to ISCO classification
	g hrwage_ = ((inc_mth/4)/hours)
	g lwage_ = log(inc_mth)
	g lhwage_ = log(hrwage_)
	g lhwageH_=lhwage_ if highs==1 //wage for high skilled
	g lhwageL_=lhwage_ if highs==0 //wage for low skilled
	g unemployed_=cond(work_emp_main==2, 1, 0, .)
	rename work_active active_
	g agriculture_=cond(activ_group==1, 1, 0, .) //(agriculture)
	g construction_ = cond(activ_group==4, 1, 0, .) 
	g manufac_ = cond(activ_group==3, 1, 0, .) 
	g manuwage_ = lhwage_ if manufac_==1 // wage of workers in transformative industry
	* g dummy for being young:
	g young_ = cond(age>=16 & age<=25, 1, 0, .)
	* g dummy for being young and unemployed
	g youngUE_ = cond(age>=16 & age<=25 & work_emp_main==2, 1, 0, .)
	* g dummy for being young, male and unemployed
	g ymaleUE_= (youngUE_ & male==1)
	* g dummy for being young, male, low-educated and unemployed
	g ymaleleUE_= (youngUE_ & male==1 & educ_high<3)
	*g dummy for being male, low-educated and unemployed
	g maleleUE_= (work_emp_main==2 & male==1 & educ_high<3)
	*sector share
	forv s=1/13{
		g sect`s'_=cond(activ_group==`s', 1, 0, .)
	}
	*wage for informal or formal workers:
	g lwage_inf_ = lhwage_ if formal==0
	g lwage_f_ = lhwage_ if formal==1

	
keep year hhid V0102 UF sect* skills* active_ unemployed_ agriculture_ construction_ manufac_ manuwage_ lwage_ lhwage_ lhwageH_ lhwageL_ lwage_inf_ lwage_f_ highe_ highs_ young_ youngUE_ ymaleUE_ ymaleleUE_ maleleUE_ formal_ weight_i work_stat_main
sort hhid
save `i0`y''
clear

	if `y'==1{
	//2001
		infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 83 UPA 159-161 V4602 88-91 rent 31-42 sector1 82 hhweight 142-146 using "PNAD_200`y'\dados\DOM200`y'.txt"
		sort hhid
		save `hh0`y''
		clear
	}
	if `y'==2{
	//2002
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 90 UPA 166-168 V4602 95-98 rent 32-43 sector1 89 hhweight 149-153 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==3{
//2003
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 90 UPA 166-168 V4602 95-98 rent 32-43 sector1 89 hhweight 149-153 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==4{
//2004
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 143 UPA 216-218 V4602 148-151 rent 31-42 sector1 142 hhweight 199-203 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==5{
//2005
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 85 UPA 158-160 V4602 90-93 rent 31-42 sector1 84 hhweight 141-145 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==6{
//2006
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 85 UPA 158-160 V4602 90-93 rent 31-42 sector1 84 hhweight 141-145 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
	clear
	}
	if `y'==7{
	//2007
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 83 UPA 156-158 V4602 88-91 rent 31-42 sector1 82 hhweight 139-143 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==8{
	//2008
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 85 UPA 158-160 V4602 90-93 rent 31-42 sector1 84 hhweight 141-145 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
clear
	}
	if `y'==9{
//2009
infix year 1-4 str hhid 5-15 UF 5-6 V0102 5-12 V4107 85 UPA 158-160 V4602 90-93 rent 31-42 sector1 84 hhweight 141-145 using "PNAD_200`y'\dados\DOM200`y'.txt"
sort hhid
save `hh0`y''
	}
}

//merge individual with hh level data
forv y=1/9{
	use `i0`y'', clear
	merge m:1 hhid using `hh0`y'', nogen keep(3)
	save `a0`y''
}

use `a09', clear
append using `a08' `a07' `a06' `a05' `a04' `a03' `a02' `a01'

***********************************************************.
***** BRAZILIAN MUNICIPALITIES IDENTIFICATION IN PNAD *****.
***** DECADE OF 2000s YEARS - MASTER SAMPLE ***************.
***** 817 municipalities for Panel Data Analysis **********.
***** by VLADIMIR FERNANDES MACIEL ************************.
***** for IDB - March 2013 ********************************.
***********************************************************.

*** Municipalities in Metropolitan Areas (=139) ***.

gen METROPOLITAN = 1
replace METROPOLITAN = 0 if V4107==2|V4107==3
gen STRATUM_MET = (UF*100000000) + UPA if METROPOLITAN==1
replace STRATUM_MET = 0 if STRATUM_MET==.


*** Auto-Representative Municipalities in Non-Metropolitan Areas (=134) ***.

gen AUTOREP = 1
replace AUTOREP = 0 if V4107==1|V4107==3
gen STRATUM_AR = (UF*100000000) + UPA if AUTOREP==1
replace STRATUM_AR = 0 if STRATUM_AR==.


*** Non Auto-Representative Municipalities in Non-Metropolitan Areas (=544) ***.

gen NONAUTOREP = 1
replace NONAUTOREP = 0 if V4107==1|V4107==2
gen STRATUM_NAR = (UF*100000000) + (99*1000000) + (V4602*10000) if NONAUTOREP==1
replace STRATUM_NAR = 0 if STRATUM_NAR==.


**** Identified Information about Municipalities ****************.
**** PSU - Primary Sample Unit - and Dwelling Units per PSU *****.
gen STRATUM = STRATUM_MET + STRATUM_AR + STRATUM_NAR
gen MUN =(UF*100000) + UPA
gen PSU = V0102*1000 if NONAUTOREP==0
replace PSU = (UF*1000000) + (V4602*10000) + UPA if PSU==.
sort UF PSU
gen CONTROL=1
by PSU, sort: egen DWELLING=total(CONTROL)
sort MUN
tostring MUN, g(municip)
drop V4107 UPA V4602 METROPOLITAN STRATUM_MET AUTOREP STRATUM_AR NONAUTOREP STRATUM_NAR STRATUM MUN PSU CONTROL DWELLING

// collapse to municipality level in order to get rent
bysort municip year: egen rent_md = median(rent)
g sector = cond(sector1==1, 1, 0, .)

collapse (mean) rent rent_md sector active_ unemployed_ agriculture_ construction_ manufac_ manuwage_ lwage_  ///
		lhwage_ lhwageH_ lhwageL_ lwage_f_ lwage_inf_ highe_ highs_ young_ youngUE_ ymaleUE_ ymaleleUE_ maleleUE_ formal_ sect1_ sect2_ sect3_ sect4_ sect5_ sect6_ sect7_ sect8_ ///
		sect9_ sect10_ sect11_ sect12_ sect13_ skills1 skills2 skills3 [pw=weight_i], by(municip year)
sort municip
g mc = _n
xtset mc year

g emp = (1-unemployed)
forv s=1/13{
	bysort year: egen totpsect_`s' = mean(sect`s'_)
	g p_sector`s'=(sect`s'_/emp)
	* specialisation index:
	g spec_`s'=((p_sector`s' - totpsect_`s')^2)
	drop totpsect_`s' p_sector`s' 
	}
	egen specsum_=rowtotal(spec_1-spec_13)	
	drop spec_1-spec_13


save "$data\PNAD_2001-09.dta", replace


// correct codes to make them the same as used by the IBGE
clear
import excel using "$data\PNAD_municip-codes.xlsx", first
sort municip
tostring municip, replace
merge 1:m municip using "$data\PNAD_2001-09.dta", nogen keep(3)
drop municip
rename MUNICIP municip

xtset municip year
drop mc

tostring municip, replace
*generate a dummy that identifies these Municipalities as part of the PNAD panel:
g PNAD = 1

compress

save "$data\PNAD_2001-09_p2.dta", replace

*done*

